# Input:    /Data/finalData.RData
# Output:   /Paper/Figures/coefPlot_interrupted_SI.pdf
#           /Paper/Figures/coefPlot_interrupted.pdf
# Author:   JB


rm(list = ls())
require(lme4)
require(lfe)
require(tidyverse)
require(ggridges)

getwd()
setwd('C:/Users/bisbeejh/Dropbox/FED/FED/Paper/JOP/RR1_replication/')
load('./data/finalData.RData')
dims <- colnames(utterance_level %>% select(matches('SENT_'),-matches('_lag|error')) %>% select(-matches('SEVERE|AUTHOR|LIKELY')))


# Speaker intercepts, controlling for document, with minimum utterances > 100
summary(mod <- felm(as.formula(paste0('interrupted ~ factor(opensecretsID)',
                                      ' + poly(scale(log(nchars)),3) + poly(scale(log(tot_utterances)),3) + ',
                                      paste(paste0('topic_',1:100),collapse = ' + '),
                                      ' + ',
                                      paste(paste0('scale(',dims[-which(grepl('comb',dims))],'_lag)'),collapse = ' + '),
                                      ' + interruptor',
                                      '| docID | 0 | opensecretsID + docID')),
                    utterance_level %>% 
                      filter(all >= 100 & !grepl('EXPERT',opensecretsID) & ind > mind) %>%
                      mutate(opensecretsID = paste0(opensecretsID,position))))$coefficients %>%
  data.frame() %>% head()

toplot <- summary(mod)$coefficients %>%
  data.frame() %>%
  rename_all(function(x) gsub('Estimate','est',gsub('Cluster.s.e.','se',gsub('t.value','tstat',gsub('Pr...t..','pval',x))))) %>%
  mutate(covs =rownames(.)) %>%
  filter(grepl('opensecrets',covs),
         !is.na(est)) %>%
  mutate(covs =  gsub('factor\\(opensecretsID\\)','',covs)) %>%
  as_tibble() %>%
  left_join(utterance_level %>% select(opensecretsID,position,party,gender,stab,speaker,name,all) %>% distinct() %>%
            mutate(opensecretsID = paste0(opensecretsID,position)),
            by = c('covs' = 'opensecretsID')) %>%
  mutate(stab = ifelse(grepl('Chairperson',covs),'DC',stab),
         id = gsub(' Chair','',gsub('Legislator','MC',gsub('Committee','Comm',gsub('Fed Chair: ','',paste0(str_to_title(name),' [',position,': ',party,'-',stab,' (',gender,')]'))))),
         fedID = ifelse(grepl('FED',covs),'Fed Chair','Not'))

cols <- ifelse(toplot$position[order(toplot$est)] == 'Committee Chair','blue',
               ifelse(toplot$position[order(toplot$est)] == 'Fed Chair','red','grey40'))

pdf('./output/figures/coefPlot_interrupted.pdf',width = 7,height = 7)
toplot %>%
  ggplot(aes(x = est,y = reorder(id,est),color = position,shape = position)) + 
  geom_vline(xintercept = 0,linetype = 'dashed') + 
  geom_errorbarh(aes(xmin = est-2*se,xmax = est+2*se),height = 0,linewidth = .1) + 
  geom_point(aes(size = all),fill = 'white') + 
  theme_ridges() + 
  scale_color_manual(name = '',values = c('grey50','black','grey40')) +
  scale_shape_manual(name = '',values = c(15,19,21)) + 
  scale_size_continuous(name = 'Total Utterances',breaks = c(100,500,1000),range = c(2,7)) + 
  scale_y_discrete(expand = c(.05,0)) + 
  geom_text(data = toplot %>%
              filter(position == 'Fed Chair',
                     est < 0),show.legend  = FALSE,
            aes(x = est+2*se,y = reorder(id,est),label = gsub('\\[Fed-DC |\\]','',id)),size = 5,hjust = 0) + 
  geom_text(data = toplot %>%
              filter(position == 'Fed Chair',
                     est > 0),show.legend  = FALSE,
            aes(x = est-2*se,y = reorder(id,est),label = gsub('\\[Fed-DC |\\]','',id)),size = 5,hjust = 1.1,vjust = .5) + 
  geom_text(data = toplot %>%
              filter(position == 'Legislator'),show.legend  = FALSE,
            aes(x = est,y = reorder(id,est),label = gsub('.*\\(|\\).*','',id)),size = 2.5,hjust = .5,vjust = .5) + 
  geom_text(data = toplot %>%
              filter(position == 'Committee Chair'),show.legend  = FALSE,
            aes(x = est,y = reorder(id,est),label = gsub('.*\\(|\\).*','',id)),size = 2.5,hjust = .5,vjust = .5,color = 'white') + 
  xlab('Fixed Effect Intercept (Reference = Bernanke)') + 
  theme(legend.position = 'right',legend.box="vertical",panel.grid.major.y = element_blank(),
        axis.text.y = element_blank()) + #element_text(color = cols,size = 10,hjust = 1,vjust = .5)) + 
  ylab('') + 
  xlim(c(-.2,.2))
dev.off()


# AFUCKING APSR!@!
stargazer(mod,
          keep = 'FED|nchars|utterances|SENT|interrupt',
          add.lines = list(c('100 LDA Topic Loadings','Y','Y','Y','Y'),
                           c('Tone Probabilities','Y','Y','Y','Y'),
                           c('Hearing FE','Y','Y','Y','Y'),
                           c('Speaker FE','Y','Y','Y','Y')),
          star.char = c('\\dag','*','**','***'),star.cutoffs = c(.1,.05,.01,.001),
          keep.stat = c('n','rsq'),out = '../Paper/Tables/coefPlot_interrupted.tex')


# for appendix
summary(mod <- felm(as.formula(paste0('interrupted ~ factor(opensecretsID) + poly(scale(log(nchars)),3)',
                                      ' + poly(scale(log(nchars)),3) + poly(scale(log(tot_utterances)),3) + ',
                                      paste(paste0('topic_',1:100),collapse = ' + '),
                                      ' + ',
                                      paste(paste0('scale(',dims,')'),collapse = ' + '),
                                      ' + interruptor + gender + scale(age) + scale(seniority) + scale(votepct)',
                                      ' + anyBill + yellen_vote',
                                      '| docID | 0 | opensecretsID')),
                    utterance_level %>% filter(all > 30 & !grepl('EXPERT',opensecretsID) & ind > mind)))

toplot <- summary(mod)$coefficients %>%
  data.frame() %>%
  rename_all(function(x) gsub('Estimate','est',gsub('Cluster.s.e.','se',gsub('t.value','tstat',gsub('Pr...t..','pval',x))))) %>%
  mutate(covs =rownames(.)) %>%
  filter(grepl('opensecrets',covs),
         !is.na(est)) %>%
  mutate(covs =  gsub('factor\\(opensecretsID\\)','',covs)) %>%
  as_tibble() %>%
  left_join(utterance_level %>% select(opensecretsID,position,party,gender,stab,speaker,name,all) %>% distinct(),by = c('covs' = 'opensecretsID')) %>%
  mutate(party = ifelse(grepl('Expert',covs),'Expert',
                        ifelse(grepl('Chairperson',covs),'FED',party)),
         stab = ifelse(grepl('Chairperson',covs),'DC',stab),
         id = paste0(str_to_title(name),' [',party,'-',stab,' (',gender,')]'),
         fedID = ifelse(grepl('FED',covs),'Fed Chair','Not'))

pdf('../Paper/Figures/coefPlot_interrupted_SI.pdf',width = 9,height = 11)
toplot %>%
  # filter(pval < .05) %>%
  ggplot(aes(x = est,y = reorder(id,est),color = fedID,label = all)) + 
  geom_vline(xintercept = 0,linetype = 'dashed') + 
  geom_errorbarh(aes(xmin = est-2*se,xmax = est+2*se),height = 0) + 
  geom_point(aes(size = all),shape = 21,fill = 'white') + 
  theme_ridges() + 
  scale_color_manual(name = '',values = c('red','grey40'),breaks = c('Fed Chair'),labels = c('Fed Chair')) + 
  scale_size_continuous(name = 'Total Utterances',breaks = c(20,100,500,1000),range = c(2,7)) + 
  xlab('Fixed Effect Intercept (Reference = Bernanke)') + 
  theme(legend.position = 'bottom',legend.box="vertical") + 
  geom_text(nudge_x = .05,hjust = 0,size = 2.5) + 
  theme(axis.text.y = element_text(size = 6)) + 
  ylab('')
dev.off()